library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::arrange()   masks plyr::arrange()
## x purrr::compact()   masks plyr::compact()
## x dplyr::count()     masks plyr::count()
## x dplyr::failwith()  masks plyr::failwith()
## x dplyr::filter()    masks stats::filter()
## x dplyr::id()        masks plyr::id()
## x dplyr::lag()       masks stats::lag()
## x dplyr::mutate()    masks plyr::mutate()
## x dplyr::rename()    masks plyr::rename()
## x dplyr::summarise() masks plyr::summarise()
## x dplyr::summarize() masks plyr::summarize()
library(stringr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# import dataset
# tran <- read.csv(file = './data/transactions.csv')
accounts <- read.csv(file ='./data/accounts_analytical.csv')
accounts$credit_cards[is.na(accounts$credit_cards)] <-0
accounts$loan_status[is.na(accounts$loan_status)] <- "None"
credit_cards = table(accounts$credit_cards)
accounts["avg_balance"] = (accounts$max_balance+accounts$min_balance)/2
fig <- plot_ly(data = accounts,x = ~avg_balance, y = ~credit_cards,alpha = 1,type = "scatter",
  text = ~paste("Loan: ", accounts$loan_status),color = accounts$loan_status
)

fig <- fig %>%
  layout(
    title = "Characteristic of accounts",
    yaxis = list(dtick = 1, tick0 = 0, tickmode = "linear"
))

fig
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
# Python for this task

acc = pd.read_csv("./data/accounts_analytical.csv")
acc.credit_cards.fillna(0, inplace=True)
acc.loan_status.fillna("none", inplace=True)
acc["avg_balance"] = (acc.max_balance+acc.min_balance)/2

# categorize by loan status
cr0 = acc[acc["loan_status"]=="none"]
cr1 = acc[acc["loan_status"]=="current"]
cr2 = acc[acc["loan_status"]=="expired"]

plt.scatter(cr0.avg_balance, cr0.credit_cards, label="none")
plt.scatter(cr1.avg_balance, cr1.credit_cards, label="current")
plt.scatter(cr2.avg_balance, cr2.credit_cards, label="expired")
plt.legend()
plt.yticks([0,1,2])
## ([<matplotlib.axis.YTick object at 0x7ffe4eae1e10>, <matplotlib.axis.YTick object at 0x7ffe4eae19e8>, <matplotlib.axis.YTick object at 0x7ffe56527e80>], [Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, '')])
plt.xlabel("avg_balance")
plt.ylabel("Number of credit cards")
plt.title("Characteristic of accounts")
plt.show()

```